set more off
global wd "C:\Users\creto\Desktop\chinese_students_2017"

cd "$wd/Data"

set scheme s1mono

use "$wd\Data\6.AnalysisData\City_Control&Policy97-14_citycode.dta",clear 
gen lngdp = log(gdp)
gen lnpop= log(pop)
gen lnpgdp = lngdp - lnpop
keep citycode ln* year
preserve 
keep if year == 2005
rename ln* ln*_0
drop year
save "$wd\Data\lib\tem.dta",replace  // Temporary folder
restore
keep if year == 2011
merge 1:1 citycode using "$wd\Data\lib\tem.dta" // Temporary folder
keep if _m==3
drop _m
gen dlngdp =  lngdp - lngdp_0
gen dlnpgdp =  lnpgdp - lnpgdp_0
gen dlnpop =  lnpop - lnpop_0
keep citycode *_0 dln*
save "$wd\Data\lib\tem_city.dta",replace  // Temporary folder

use "$wd\Data\1.ChinaData\Gaokao\NCEE0511.dta",clear
gen tem = substr(city,1,2)
replace city = tem + "00" if tem=="11"| tem=="12"| tem=="31"| tem=="50"
drop tem
rename city citycode
destring city,replace
collapse (sum) num* , by(city year)
sort citycode year

gen sh_1= num_first/ num_adm
gen sh_211= num_211/ num_adm
gen sh_985= num_985/ num_adm
keep year citycode sh_*
preserve
keep if year == 2005
drop year
rename sh_* sh_*0
save "$wd\Data\lib\tem.dta",replace // Temporary folder
restore
keep if year == 2011
drop year
merge 1:1 citycode using "$wd\Data\lib\tem.dta" // Temporary folder
keep if _m==3
drop _m

gen dsh_1 = sh_1 - sh_10
gen dsh_211 = sh_211 - sh_2110
gen dsh_985 = sh_985 - sh_9850



			

merge 1:1 citycode using "$wd/Data/6.AnalysisData/IV1_March2019/gapsexpweighted97"  // NTRGap
*keep if _m==3
drop _m
merge 1:1 citycode using "$wd\Data\lib\tem_city.dta" // Temporary folder
*keep if _m==3
drop _m
gen total_pop =exp(lnpop_0)
gen y =.
gen x =.


* Figure E7 d - f

replace y = dsh_1
replace x = ntrgap_expweighted
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp1",replace)  ylabel( , gmin gmax format(%5.2f) )    ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("PNTR{sub:c} (Weighted NTR Gaps)", size(medium)) scheme(s1mono)|| ///
		lfit mean_y mean_x ,     legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop
		


replace y = dsh_211
replace x = ntrgap_expweighted
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp2",replace) ylabel( , gmin gmax format(%5.2f) )   ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("PNTR{sub:c} (Weighted NTR Gaps)", size(medium)) scheme(s1mono)|| ///
				   lfit mean_y mean_x,   legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop


replace y = dsh_985
replace x = ntrgap_expweighted
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp2",replace) ylabel( , gmin gmax format(%5.2f) )   ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("PNTR{sub:c} (Weighted NTR Gaps)", size(medium)) scheme(s1mono)|| ///
				   lfit mean_y mean_x,   legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop


* Figure E7 a - c

* pgdp

replace y = dsh_1
replace x = dlnpgdp
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter  mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp1",replace)  ylabel( , gmin gmax format(%5.2f) )  ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("{&Delta}ln(per capita GDP){sub:c}", size(medium)) scheme(s1mono)  || ///
				   lfit mean_y mean_x,  legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop
		   
				   
replace y = dsh_211
replace x = dlnpgdp
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter  mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp1",replace)  ylabel( , gmin gmax format(%5.2f) )  ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("{&Delta}ln(per capita GDP){sub:c}", size(medium)) scheme(s1mono)  || ///
				   lfit mean_y mean_x,  legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop
		   
		   
replace y = dsh_985
replace x = dlnpgdp
egen x_bin = cut(x), group(40)
egen mean_x=mean(x), by(x_bin)
egen mean_y=mean(y), by(x_bin)
egen sum_pop=mean(total_pop), by(x_bin)

twoway scatter  mean_y mean_x [weight=sum_pop],  msymbol(circle_hollow) saving("$wd\Data\lib\temp1",replace)  ylabel( , gmin gmax format(%5.2f) )  ytitle("{&Delta} Share of admitted students",size(medium)) xtitle("{&Delta}ln(per capita GDP){sub:c}", size(medium)) scheme(s1mono)  || ///
				   lfit mean_y mean_x,  legend(off order(2) label(2 "Fitted Line;  {&beta}=`bbb'  t-stat=`ttt'")  position(6) ) 
drop x_bin  mean_x mean_y  sum_pop
		   		   
		    